-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[InstCombine] Optimize (select %x, op(%x), 0) to op(%x) for operations where op(0) == 0 #147605
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Ryan Buchner (bababuck) ChangesCurrently this optimization only occurs for There is similar logic within Full diff: https://github.com/llvm/llvm-project/pull/147605.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 73ba0f78e8053..023ca5245f494 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -878,7 +878,11 @@ static Instruction *foldSetClearBits(SelectInst &Sel,
// is a vector consisting of 0 and undefs. If a constant compared with x
// is a scalar undefined value or undefined vector then an expression
// should be already folded into a constant.
-static Instruction *foldSelectZeroOrMul(SelectInst &SI, InstCombinerImpl &IC) {
+//
+// This also holds all operations such that Op(0) == 0
+// e.g. Shl, Umin, etc
+static Instruction *foldSelectZeroOrFixedOp(SelectInst &SI,
+ InstCombinerImpl &IC) {
auto *CondVal = SI.getCondition();
auto *TrueVal = SI.getTrueValue();
auto *FalseVal = SI.getFalseValue();
@@ -900,9 +904,7 @@ static Instruction *foldSelectZeroOrMul(SelectInst &SI, InstCombinerImpl &IC) {
// non-zero elements that are masked by undef elements in the compare
// constant.
auto *TrueValC = dyn_cast<Constant>(TrueVal);
- if (TrueValC == nullptr ||
- !match(FalseVal, m_c_Mul(m_Specific(X), m_Value(Y))) ||
- !isa<Instruction>(FalseVal))
+ if (TrueValC == nullptr || !isa<Instruction>(FalseVal))
return nullptr;
auto *ZeroC = cast<Constant>(cast<Instruction>(CondVal)->getOperand(1));
@@ -913,11 +915,28 @@ static Instruction *foldSelectZeroOrMul(SelectInst &SI, InstCombinerImpl &IC) {
if (!match(MergedC, m_Zero()) && !match(MergedC, m_Undef()))
return nullptr;
- auto *FalseValI = cast<Instruction>(FalseVal);
- auto *FrY = IC.InsertNewInstBefore(new FreezeInst(Y, Y->getName() + ".fr"),
- FalseValI->getIterator());
- IC.replaceOperand(*FalseValI, FalseValI->getOperand(0) == Y ? 0 : 1, FrY);
- return IC.replaceInstUsesWith(SI, FalseValI);
+ if (match(FalseVal, m_c_Mul(m_Specific(X), m_Value(Y))) ||
+ match(FalseVal, m_c_And(m_Specific(X), m_Value(Y))) ||
+ match(FalseVal, m_Shl(m_Specific(X), m_Value(Y))) ||
+ match(FalseVal, m_AShr(m_Specific(X), m_Value(Y))) ||
+ match(FalseVal, m_LShr(m_Specific(X), m_Value(Y))) ||
+ match(FalseVal, m_FShl(m_Specific(X), m_Specific(X), m_Value(Y))) ||
+ match(FalseVal, m_FShr(m_Specific(X), m_Specific(X), m_Value(Y))) ||
+ match(FalseVal, m_SDiv(m_Specific(X), m_Value(Y))) ||
+ match(FalseVal, m_UDiv(m_Specific(X), m_Value(Y))) ||
+ match(FalseVal, m_c_UMin(m_Specific(X), m_Value(Y)))) {
+ auto *FalseValI = cast<Instruction>(FalseVal);
+ auto *FrY = IC.InsertNewInstBefore(new FreezeInst(Y, Y->getName() + ".fr"),
+ FalseValI->getIterator());
+ IC.replaceOperand(*FalseValI,
+ FalseValI->getOperand(0) == Y
+ ? 0
+ : (FalseValI->getOperand(1) == Y ? 1 : 2),
+ FrY);
+ return IC.replaceInstUsesWith(SI, FalseValI);
+ }
+
+ return nullptr;
}
/// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b).
@@ -4104,7 +4123,7 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
return Add;
if (Instruction *Or = foldSetClearBits(SI, Builder))
return Or;
- if (Instruction *Mul = foldSelectZeroOrMul(SI, *this))
+ if (Instruction *Mul = foldSelectZeroOrFixedOp(SI, *this))
return Mul;
// Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
diff --git a/llvm/test/Transforms/InstCombine/icmp-select.ll b/llvm/test/Transforms/InstCombine/icmp-select.ll
index a038731abbc48..c6c0ba385a6fd 100644
--- a/llvm/test/Transforms/InstCombine/icmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-select.ll
@@ -248,10 +248,9 @@ define i1 @icmp_select_implied_cond_relational_off_by_one(i8 %x, i8 %y) {
define i1 @umin_seq_comparison(i8 %x, i8 %y) {
; CHECK-LABEL: @umin_seq_comparison(
-; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[X:%.*]], 0
-; CHECK-NEXT: [[CMP21:%.*]] = icmp ule i8 [[X]], [[Y:%.*]]
-; CHECK-NEXT: [[CMP2:%.*]] = select i1 [[CMP1]], i1 true, i1 [[CMP21]]
-; CHECK-NEXT: ret i1 [[CMP2]]
+; CHECK-NEXT: [[Y:%.*]] = freeze i8 [[Y1:%.*]]
+; CHECK-NEXT: [[CMP21:%.*]] = icmp ule i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT: ret i1 [[CMP21]]
;
%min = call i8 @llvm.umin.i8(i8 %x, i8 %y)
%cmp1 = icmp eq i8 %x, 0
diff --git a/llvm/test/Transforms/InstCombine/select-fixed-zero.ll b/llvm/test/Transforms/InstCombine/select-fixed-zero.ll
new file mode 100644
index 0000000000000..b41f443d6131e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-fixed-zero.ll
@@ -0,0 +1,170 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s --check-prefix=FIXED-ZERO
+
+; (select (icmp x, 0, eq), 0, (umin x, y)) -> (umin x, y)
+define i64 @umin_select(i64 %a, i64 %b) {
+; FIXED-ZERO-LABEL: @umin_select(
+; FIXED-ZERO-NEXT: [[B_FR:%.*]] = freeze i64 [[B:%.*]]
+; FIXED-ZERO-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[A:%.*]], i64 [[B_FR]])
+; FIXED-ZERO-NEXT: ret i64 [[UMIN]]
+;
+ %cond = icmp eq i64 %a, 0
+ %umin = call i64 @llvm.umin.i64(i64 %a, i64 %b)
+ %select = select i1 %cond, i64 0, i64 %umin
+ ret i64 %select
+}
+
+; (select (icmp x, 0, eq), 0, (mul x, y)) -> (mul x, y)
+define i64 @mul_select(i64 %a, i64 %b) {
+; FIXED-ZERO-LABEL: @mul_select(
+; FIXED-ZERO-NEXT: [[B_FR:%.*]] = freeze i64 [[B:%.*]]
+; FIXED-ZERO-NEXT: [[MUL:%.*]] = mul i64 [[A:%.*]], [[B_FR]]
+; FIXED-ZERO-NEXT: ret i64 [[MUL]]
+;
+ %cond = icmp eq i64 %a, 0
+ %mul = mul i64 %a, %b
+ %select = select i1 %cond, i64 0, i64 %mul
+ ret i64 %select
+}
+
+; (select (icmp x, 0, eq), 0, (shl x, y)) -> (shl x, y)
+define i64 @shl_select(i64 %a, i64 %b) {
+; FIXED-ZERO-LABEL: @shl_select(
+; FIXED-ZERO-NEXT: [[B_FR:%.*]] = freeze i64 [[B:%.*]]
+; FIXED-ZERO-NEXT: [[SHL:%.*]] = shl i64 [[A:%.*]], [[B_FR]]
+; FIXED-ZERO-NEXT: ret i64 [[SHL]]
+;
+ %cond = icmp eq i64 %a, 0
+ %shl = shl i64 %a, %b
+ %select = select i1 %cond, i64 0, i64 %shl
+ ret i64 %select
+}
+
+; (select (icmp x, 0, eq), 0, (and x, y)) -> (and x, y)
+define i64 @and_select(i64 %a, i64 %b) {
+; FIXED-ZERO-LABEL: @and_select(
+; FIXED-ZERO-NEXT: [[B_FR:%.*]] = freeze i64 [[B:%.*]]
+; FIXED-ZERO-NEXT: [[AND:%.*]] = and i64 [[A:%.*]], [[B_FR]]
+; FIXED-ZERO-NEXT: ret i64 [[AND]]
+;
+ %cond = icmp eq i64 %a, 0
+ %and = and i64 %a, %b
+ %select = select i1 %cond, i64 0, i64 %and
+ ret i64 %select
+}
+
+; (select (icmp x, 0, ne), (ashr x, y), 0) -> (ashr x, y)
+define i64 @ashr_select(i64 %a, i64 %b) {
+; FIXED-ZERO-LABEL: @ashr_select(
+; FIXED-ZERO-NEXT: [[B_FR:%.*]] = freeze i64 [[B:%.*]]
+; FIXED-ZERO-NEXT: [[ASHR:%.*]] = ashr i64 [[A:%.*]], [[B_FR]]
+; FIXED-ZERO-NEXT: ret i64 [[ASHR]]
+;
+ %cond = icmp ne i64 0, %a
+ %ashr = ashr i64 %a, %b
+ %select = select i1 %cond, i64 %ashr, i64 0
+ ret i64 %select
+}
+
+; (select (icmp x, 0, ne), (lshr x, y), 0) -> (lshr x, y)
+define i64 @lshr_select(i64 %a, i64 %b) {
+; FIXED-ZERO-LABEL: @lshr_select(
+; FIXED-ZERO-NEXT: [[B_FR:%.*]] = freeze i64 [[B:%.*]]
+; FIXED-ZERO-NEXT: [[LSHR:%.*]] = lshr i64 [[A:%.*]], [[B_FR]]
+; FIXED-ZERO-NEXT: ret i64 [[LSHR]]
+;
+ %cond = icmp ne i64 0, %a
+ %lshr = lshr i64 %a, %b
+ %select = select i1 %cond, i64 %lshr, i64 0
+ ret i64 %select
+}
+
+; (select (icmp x, 0, eq), 0, fshr(x, x, y)) -> fshr(x, x, y)
+define i64 @fshr_select(i64 %a, i64 %b) {
+; FIXED-ZERO-LABEL: @fshr_select(
+; FIXED-ZERO-NEXT: [[B_FR:%.*]] = freeze i64 [[B:%.*]]
+; FIXED-ZERO-NEXT: [[FSHR:%.*]] = call i64 @llvm.fshr.i64(i64 [[A:%.*]], i64 [[A]], i64 [[B_FR]])
+; FIXED-ZERO-NEXT: ret i64 [[FSHR]]
+;
+ %cond = icmp eq i64 %a, 0
+ %fshr = call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
+ %select = select i1 %cond, i64 0, i64 %fshr
+ ret i64 %select
+}
+
+; (select (icmp x, 0, eq), 0, (fshl x, x, y)) -> (fshl x, x, y)
+define i64 @fshl_select(i64 %a, i64 %b) {
+; FIXED-ZERO-LABEL: @fshl_select(
+; FIXED-ZERO-NEXT: [[B_FR:%.*]] = freeze i64 [[B:%.*]]
+; FIXED-ZERO-NEXT: [[FSHL:%.*]] = call i64 @llvm.fshl.i64(i64 [[A:%.*]], i64 [[A]], i64 [[B_FR]])
+; FIXED-ZERO-NEXT: ret i64 [[FSHL]]
+;
+ %cond = icmp eq i64 %a, 0
+ %fshl = call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
+ %select = select i1 %cond, i64 0, i64 %fshl
+ ret i64 %select
+}
+
+; (select (icmp x, 0, eq), 0, (fshr x, z, y)) -> leave as is
+define i64 @fshr_select_no_combine(i64 %a, i64 %b, i64 %c) {
+; FIXED-ZERO-LABEL: @fshr_select_no_combine(
+; FIXED-ZERO-NEXT: [[COND:%.*]] = icmp eq i64 [[A:%.*]], 0
+; FIXED-ZERO-NEXT: [[FSHR:%.*]] = call i64 @llvm.fshr.i64(i64 [[A]], i64 [[B:%.*]], i64 [[C:%.*]])
+; FIXED-ZERO-NEXT: [[SELECT:%.*]] = select i1 [[COND]], i64 0, i64 [[FSHR]]
+; FIXED-ZERO-NEXT: ret i64 [[SELECT]]
+;
+ %cond = icmp eq i64 %a, 0
+ %fshr = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+ %select = select i1 %cond, i64 0, i64 %fshr
+ ret i64 %select
+}
+
+; (select (icmp x, 0, eq), 0, (sdiv x, y)) -> (sdiv x, y)
+define i64 @sdiv_select(i64 %a, i64 %b) {
+; FIXED-ZERO-LABEL: @sdiv_select(
+; FIXED-ZERO-NEXT: [[B:%.*]] = freeze i64 [[B1:%.*]]
+; FIXED-ZERO-NEXT: [[DIV:%.*]] = sdiv i64 [[A:%.*]], [[B]]
+; FIXED-ZERO-NEXT: ret i64 [[DIV]]
+;
+ %cond = icmp eq i64 %a, 0
+ %div = sdiv i64 %a, %b
+ %select = select i1 %cond, i64 0, i64 %div
+ ret i64 %select
+}
+
+; (select (icmp x, 0, eq), 0, (udiv x, y)) -> (udiv x, y)
+define i64 @udiv_select(i64 %a, i64 %b) {
+; FIXED-ZERO-LABEL: @udiv_select(
+; FIXED-ZERO-NEXT: [[B:%.*]] = freeze i64 [[B1:%.*]]
+; FIXED-ZERO-NEXT: [[DIV:%.*]] = udiv i64 [[A:%.*]], [[B]]
+; FIXED-ZERO-NEXT: ret i64 [[DIV]]
+;
+ %cond = icmp eq i64 %a, 0
+ %div = udiv i64 %a, %b
+ %select = select i1 %cond, i64 0, i64 %div
+ ret i64 %select
+}
+
+; (select (icmp x, 0, eq), 0, (icmp x, 0, slt)) -> (icmp x, 0, slt)
+define i1 @icmp_slt_select(i64 %a) {
+; FIXED-ZERO-LABEL: @icmp_slt_select(
+; FIXED-ZERO-NEXT: [[ICMP:%.*]] = icmp slt i64 [[A:%.*]], 0
+; FIXED-ZERO-NEXT: ret i1 [[ICMP]]
+;
+ %cond = icmp eq i64 %a, 0
+ %icmp = icmp slt i64 %a, 0
+ %select = select i1 %cond, i1 0, i1 %icmp
+ ret i1 %select
+}
+
+; (select (icmp x, 0, eq), 0, (sub 0, x)) -> (sub 0, x)
+define i64 @sub_select(i64 %a) {
+; FIXED-ZERO-LABEL: @sub_select(
+; FIXED-ZERO-NEXT: [[SUB:%.*]] = sub i64 0, [[A:%.*]]
+; FIXED-ZERO-NEXT: ret i64 [[SUB]]
+;
+ %cond = icmp eq i64 %a, 0
+ %sub = sub i64 0, %a
+ %select = select i1 %cond, i64 0, i64 %sub
+ ret i64 %select
+}
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index ef5874ffd46ad..fa54b38d55171 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -893,10 +893,9 @@ define i32 @test56(i16 %x) {
define i32 @test57(i32 %x, i32 %y) {
; CHECK-LABEL: @test57(
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT: [[DOTAND:%.*]] = select i1 [[TOBOOL]], i32 0, i32 [[AND]]
-; CHECK-NEXT: ret i32 [[DOTAND]]
+; CHECK-NEXT: [[Y:%.*]] = freeze i32 [[Y1:%.*]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT: ret i32 [[AND]]
;
%and = and i32 %x, %y
%tobool = icmp eq i32 %x, 0
@@ -2734,10 +2733,9 @@ define void @select_freeze_icmp_multuses(i32 %x, i32 %y) {
define i32 @pr47322_more_poisonous_replacement(i32 %arg) {
; CHECK-LABEL: @pr47322_more_poisonous_replacement(
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[ARG:%.*]], 0
-; CHECK-NEXT: [[TRAILING:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
-; CHECK-NEXT: [[SHIFTED:%.*]] = lshr exact i32 [[ARG]], [[TRAILING]]
-; CHECK-NEXT: [[R1_SROA_0_1:%.*]] = select i1 [[CMP]], i32 0, i32 [[SHIFTED]]
+; CHECK-NEXT: [[TRAILING:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[ARG:%.*]], i1 true)
+; CHECK-NEXT: [[TRAILING_FR:%.*]] = freeze i32 [[TRAILING]]
+; CHECK-NEXT: [[R1_SROA_0_1:%.*]] = lshr exact i32 [[ARG]], [[TRAILING_FR]]
; CHECK-NEXT: ret i32 [[R1_SROA_0_1]]
;
%cmp = icmp eq i32 %arg, 0
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please add alive2 proofs to the PR description?
…op(0) == 0 These cases can be optimized to just op(%x).
…s where op(0) == 0 Have to freeze the any other operands to prevent poisons from leaking. Re-uses flow from `mul` specific version of this within the InstCombie pass.
Fixes bug in optimizing: ``` define <2 x i64> @php_url_encode_impl(i32 %0, ptr %p) { %2 = load <2 x i64>, ptr %p, align 16 %.not = icmp eq i32 %0, 0 %spec.select = select i1 %.not, <2 x i64> zeroinitializer, <2 x i64> %2 ret <2 x i64> %spec.select } ``` One side effect of the matching is that it garuntees that the types of the TrueV and the Conditional constant match which is assumed by the later code.
m_c_UMin will also match with an icmp/select pattern which isn't desired here.
… too large we can create a `poison` value
… can create `undef` value
d01d1f2
to
3e8c74f
Compare
Rebased to latest |
} else if (match(FalseVal, m_SDiv(m_Specific(X), m_Value(Y))) || | ||
match(FalseVal, m_UDiv(m_Specific(X), m_Value(Y))) || | ||
match(FalseVal, m_SRem(m_Specific(X), m_Value(Y))) || | ||
match(FalseVal, m_URem(m_Specific(X), m_Value(Y)))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
} else if (match(FalseVal, m_SDiv(m_Specific(X), m_Value(Y))) || | |
match(FalseVal, m_UDiv(m_Specific(X), m_Value(Y))) || | |
match(FalseVal, m_SRem(m_Specific(X), m_Value(Y))) || | |
match(FalseVal, m_URem(m_Specific(X), m_Value(Y)))) { | |
} else if (match(FalseVal, m_IDiv(m_Specific(X), m_Value(Y))) || | |
match(FalseVal, m_IRem(m_Specific(X), m_Value(Y)))) { |
Currently this optimization only occurs for
mul
, but this generalizes that for any operation that has a fixed point of0
.There is similar logic within
EarlyCSE
pass, but that is stricter in terms ofpoison
propagation so will not optimize for many operations.Alive2 Proofs:
and
:https://alive2.llvm.org/ce/z/RraasX ; base-case
https://alive2.llvm.org/ce/z/gzfFTX ; commuted-case
https://alive2.llvm.org/ce/z/63XaoX ; compare against undef
https://alive2.llvm.org/ce/z/MVRVNd ; select undef
https://alive2.llvm.org/ce/z/2bsoYG ; vector
https://alive2.llvm.org/ce/z/xByeX- ; vector compare against undef
https://alive2.llvm.org/ce/z/zNdzmZ ; vector select undef
fshl
:https://alive2.llvm.org/ce/z/U3_PG3 ; base-case
https://alive2.llvm.org/ce/z/BWCnxT ; compare against undef
https://alive2.llvm.org/ce/z/8HGAE_ ; select undef
; vector times out
fshr
:https://alive2.llvm.org/ce/z/o6F47G ; base-case
https://alive2.llvm.org/ce/z/fVnBXy ; compare against undef
https://alive2.llvm.org/ce/z/suymYJ ; select undef
; vector times out
umin
:https://alive2.llvm.org/ce/z/GGMqf6 ; base-case
https://alive2.llvm.org/ce/z/6cx5-k ; commuted-case
https://alive2.llvm.org/ce/z/W5d9tz ; compare against undef
https://alive2.llvm.org/ce/z/nKbaUn ; select undef
https://alive2.llvm.org/ce/z/gxEGqc ; vector
https://alive2.llvm.org/ce/z/_SDpi_ ; vector compare against undef
sdiv
:https://alive2.llvm.org/ce/z/5XGs3q
srem
:https://alive2.llvm.org/ce/z/vXAnQM
udiv
:https://alive2.llvm.org/ce/z/e6_8Ug
urem
:https://alive2.llvm.org/ce/z/VmM2SL
shl
:https://alive2.llvm.org/ce/z/aCZr3u ; Argument with range
https://alive2.llvm.org/ce/z/YgDy8C ; Instruction with known bits
https://alive2.llvm.org/ce/z/6pIxR6 ; Constant
lshr
:https://alive2.llvm.org/ce/z/WCCBej
`ashr:
https://alive2.llvm.org/ce/z/egV4TR